This geospatial model uses routinely collected malaria case data, population data and remotely sensed data, such as open and vegetated water bodies, to estimate population living around open water bodies, and ultimately quantify the association between proximity to larval habitat and malaria risk in health facility catchment areas in Kasungu. The buffer layer around waterbodies are created and then combined with population data in raster format to estimate the total population that is within the buffer. The data used spans from 2015 to 2020 and was derived from digitized DHIS2 malaria records, accessibility mapping, aggregated population geospatial layer and TropWet tool in Google Earth Engine.

Load packages

Loading the R packages that will be used to read in, view, transform and model the malaria cases and spatial datasets.

library(spatialEco)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(plotly)
library(lubridate)
library(knitr)
library(raster)
library(rgdal)
library(rgeos)
library(sf)
library(sp)
library(tmap)
library(spdep)
library(maptools)
library(gridExtra)
library(grid)
library(exactextractr)
library(DataExplorer)
library(mapview)
`%>%` <- magrittr::`%>%`

Tell R where the data is

here::here()
## [1] "C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021"

Load datasets

The total dry season malaria cases recorded at health-care facilities in Kasungu from 2017 to 2019 are contained in the KasunguData.csv sourced from https://dhis2.health.gov.mw/. The kasungu_facility_catchments_2004.shp shapefile also contains the population and health information within each health-facility catchment area in Kasungu district. The aggregated population raster layers for Malawi e.g.,ku_pop_2017_1km_aggregated.tif were downloaded from the Open Spatial and Demographic and Data Research website: https://www.worldpop.org/geodata/country?iso3=MWI. These layers estimate total number of people per grid-cell. The units are number of people per pixel with country totals adjusted to match the corresponding official United Nations population estimates. The datasets were downloaded in Geotiff at a resolution of 1km and are projected in Geographic Coordinate System, WGS84. The kasungu_water.shpand water_bodies layers contain open and vegetated waterbodies polygons, detected using the Tropical Wetland Unmixing Tool (TropWet). TropWet is a Google Earth Engine hosted toolbox that uses the Landsat archive to map tropical wetlands and can be accessed through: https://www.aber.ac.uk/en/dges/research/earth-observation-laboratory/research/tropwet/

# 2017, 2018 and 2019 dry season malaria cases 
dry_season_malaria_2015_2019 <- read.csv(here::here("data", "dry_season_malaria 2015-2019.csv"))

# Explore malaria data
# dry_season_malaria_2015_2019 %>% dplyr::glimpse() %>% 
#   DataExplorer::introduce() %>% 
#   DataExplorer::plot_missing()

# Export 2017, 2018 and 2019 dry season malaria cases
write.csv(dry_season_malaria_2015_2019, file = "data/dry_season_malaria_2017_2019.csv")

write.table(dry_season_malaria_2015_2019, file = "dry_season_malaria_2017_2019.txt", 
            sep = ",", quote = FALSE, row.names = FALSE)

# 2020 dry season malaria cases
ku_malaria_2020 <- read.csv(here::here("data", "dry_season_malaria_2020.csv"))

ku_malaria_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 9
## $ X              <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ~
## $ Names          <chr> "Anchor Farm Health Centre", "Bua Health Centre (Kasung~
## $ May.2020       <int> 1172, 2293, 855, NA, 258, 806, 2468, 1327, 1190, 81, 80~
## $ June.2020      <int> 571, 1310, 24, 50, 126, 551, 1756, 66, 1102, 26, 511, N~
## $ July.2020      <int> 438, 634, 143, 37, 242, 314, 1691, 799, 667, 14, 407, N~
## $ August.2020    <int> 184, 482, 122, 137, 259, 146, 1177, 640, 375, 6, 372, N~
## $ September.2020 <int> 214, 565, 115, 117, 225, 202, 1240, 626, 270, 8, 413, N~
## $ October.2020   <int> 258, 910, 231, 201, 276, 392, 1296, 738, 353, 12, 494, ~
## $ dr_2020        <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 14~
# Merge 2015 to 2019 dry season malaria case data with 2020 data
dry_season_malaria_2017_2020 <- cbind.data.frame(dry_season_malaria_2015_2019, ku_malaria_2020) 
 
dry_season_malaria_2017_2020 <- dry_season_malaria_2017_2020[,c("FID", "Names", "dr_2017", 
                                                                "dr_2018", "dr_2019", "dr_2020",
                                                                "LONGITU", "LATITUD")]

# Export 'dry season malaria 2017-2020' as csv
write.csv(dry_season_malaria_2017_2020, file = "data/dry_season_malaria_2017_2019.csv")

dry_season_malaria_2017_2020 %>% dplyr::glimpse()
## Rows: 36
## Columns: 8
## $ FID     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names   <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
# Kasungu district boundary shapefile 
kasungu_district <- st_read(here::here("data", "kasungu_district.shp"))
## Reading layer `kasungu_district' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_district.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1 feature and 5 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 491272.7 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
# Kasungu health facility catchment boundary shapefiles
malire <- sf::st_read(here::here("data", "kasungu_health_facility_catchments.shp")) # outdated
## Reading layer `kasungu_health_facility_catchments' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_health_facility_catchments.shp' using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 21 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 491272.8 ymin: 8494349 xmax: 609044.2 ymax: 8632164
## Projected CRS: WGS 84 / UTM zone 36S
malire_new <- sf::st_read(here::here("data", "Kasungu_new_health_fac_catchment_clip.shp")) # generated from accessibility mapping
## Reading layer `Kasungu_new_health_fac_catchment_clip' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\Kasungu_new_health_fac_catchment_clip.shp' using driver `ESRI Shapefile'
## Simple feature collection with 29 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 491878.5 ymin: 8494645 xmax: 609044.2 ymax: 8631908
## Projected CRS: WGS 84 / UTM zone 36S
# malire_new <- spTransform(malire_new, proj4string(malire_new))

# Kasungu population raster layer
kasungu_population_2017 <- raster(here::here("data", "ku_pop_2017_1km_aggregated.tif"))
kasungu_population_2018 <- raster(here::here("data", "ku_pop_2018_1km_aggregated.tif"))
kasungu_population_2019 <- raster(here::here("data", "ku_pop_2019_1km_aggregated.tif"))
kasungu_population_2020 <- raster(here::here("data", "ku_pop_2020_1km_aggregated.tif"))

# Open waterbodies polygons 
water_2017 <- st_read(here::here("data", "water_bodies_2017.shp"))
## Reading layer `water_bodies_2017' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2017.shp' using driver `ESRI Shapefile'
## Simple feature collection with 168 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 514497 ymin: 8495941 xmax: 603149.8 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
water_2018 <- st_read(here::here("data", "kasungu_2018_water.shp"))
## Reading layer `kasungu_2018_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2018_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1105 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 496807.6 ymin: 8494693 xmax: 607913.8 ymax: 8607747
## Projected CRS: WGS 84 / UTM zone 36S
water_2019 <- st_read(here::here("data", "kasungu_2019_water.shp"))
## Reading layer `kasungu_2019_water' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\kasungu_2019_water.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1941 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 494197.2 ymin: 8494693 xmax: 607913.8 ymax: 8617573
## Projected CRS: WGS 84 / UTM zone 36S
water_2020 <- st_read(here::here("data", "water_bodies_2020.shp"))
## Reading layer `water_bodies_2020' from data source `C:\Users\cnkolokosa\Documents\R\upscaled_2021_updated_May\upscaled_2021\data\water_bodies_2020.shp' using driver `ESRI Shapefile'
## Simple feature collection with 266 features and 1 field
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 508985.6 ymin: 8495793 xmax: 585761.1 ymax: 8620169
## Projected CRS: WGS 84 / UTM zone 36S
# Add a field ID to water bodies polygons 
water_2017$ID <- 1:nrow(water_2017)
water_2018$ID <- 1:nrow(water_2018)
water_2019$ID <- 1:nrow(water_2019)
water_2020$ID <- 1:nrow(water_2020)

View the dry season malaria case data

We observe that Kasungu district has 30 health facilities classified as dispensary, health centre, district hospital and rural hospital and the highest malaria cases were recorded at Kasungu District Hospital.

dry_season_malaria_2017_2020 %>% 
  glimpse() %>% 
  summary()
## Rows: 36
## Columns: 8
## $ FID     <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
## $ Names   <chr> "Anchor Farm", "Bua Health Centre", "Chamama Health Facility",~
## $ dr_2017 <int> 2966, 3489, 1878, NA, 3601, 2292, 5801, 2192, 2745, NA, NA, NA~
## $ dr_2018 <int> 3142, 1804, 1750, NA, 4027, 2116, 4502, 2394, 4138, NA, NA, NA~
## $ dr_2019 <int> 1978, 1740, 1508, NA, 1686, 1770, 5470, 2553, 2200, NA, NA, NA~
## $ dr_2020 <int> 2837, 6194, 1490, 542, 1386, 2411, 9628, 4196, 3957, 147, 3003~
## $ LONGITU <dbl> 33.39000, 33.53859, 33.77686, NA, 33.68528, 33.79701, 33.30816~
## $ LATITUD <dbl> -13.41000, -13.29403, -12.92431, NA, -13.10757, -12.97452, -12~
##       FID           Names              dr_2017         dr_2018     
##  Min.   : 1.00   Length:36          Min.   :  427   Min.   :  749  
##  1st Qu.: 9.75   Class :character   1st Qu.: 2196   1st Qu.: 2424  
##  Median :18.50   Mode  :character   Median : 3132   Median : 3357  
##  Mean   :18.50                      Mean   : 3586   Mean   : 3909  
##  3rd Qu.:27.25                      3rd Qu.: 3993   3rd Qu.: 4684  
##  Max.   :36.00                      Max.   :16289   Max.   :15821  
##                                     NA's   :6       NA's   :6      
##     dr_2019         dr_2020         LONGITU         LATITUD      
##  Min.   :  533   Min.   :    0   Min.   :33.18   Min.   :-13.57  
##  1st Qu.: 1748   1st Qu.: 2286   1st Qu.:33.38   1st Qu.:-13.25  
##  Median : 2556   Median : 3824   Median :33.50   Median :-12.98  
##  Mean   : 2880   Mean   : 4657   Mean   :33.52   Mean   :-12.99  
##  3rd Qu.: 3284   3rd Qu.: 6212   3rd Qu.:33.68   3rd Qu.:-12.79  
##  Max.   :10721   Max.   :24424   Max.   :33.87   Max.   :-12.42  
##  NA's   :6                       NA's   :6       NA's   :6
dry_season_malaria_2017_2020 %>%  
  plotly::plot_ly(y = ~Names,
                  x = ~dr_2017,
                  type = "bar",
                  orientation = 'h',
                  name = "2017") %>%
  plotly::add_trace(x = ~ dr_2018,
                    name = "2018") %>%
  plotly::add_trace(x = ~ dr_2019,
                    name = "2019") %>% 
  plotly::add_trace(x = ~ dr_2020,
                    name = "2020") %>% 
  plotly::layout(#barmode = "stack",
                 xaxis = list(title = "Total malaria cases"),
                 yaxis = list(title = ""),
                 hovermode = "compare",
                 margin = list(b = 10,
                               t = 10,
                               pad = 2))

Fig.1 The total malaria cases recorded at each health-care facility in Kasungu district

View location of Kasungu health-care facilities within health facility catchment area

Heath facility catchment area is the area from which a health facility attracts patients. The new health facility catchments polygon was generated from generic accessibility mapping script adapted from https://malariaatlas.org/wp-content/uploads/accessibility/R_generic_accessibilty_mapping_script.r The script requires two user supplied datasets: the 2015 friction surface, which is available here: http://www.map.ox.ac.uk/accessibility_to_cities/, and a user-supplied .csv of points dry_season_malaria_2017_2020. The accumulated cost algorithm accCost and r.Cost algorithm in GRASS GIS were run to make the final output map of new health facility catchment boundaries.

# Using the is.na() function to remove health centres with missing longitude and latitude coordinates
# Aggregating Kasalika Health Centre and Kasungu District Hospital, and 
# Kaluluma Rural Hospital and Nkhamenya Rural Hospital malaria cases

# health_facility_aggregated <- dry_season_malaria_2017_2020[!is.na(dry_season_malaria_2017_2020$LONGITU),] %>% 
#   dplyr::filter(Names != "Kasalika Health Centre",
#                 Names != "Bua Health Centre",
#                 Names != "Kaluluma Rural Hospital") %>% 
#   dplyr::mutate(dr_2017 = ifelse(Names == "Kasungu District Hospital", 4528 + 16289, dr_2017), 
#                 dr_2018 = ifelse(Names == "Kasungu District Hospital", 4493 + 15821, dr_2018), 
#                 dr_2019 = ifelse(Names == "Kasungu District Hospital", 2729 + 10721, dr_2018), 
#                 dr_2020 = ifelse(Names == "Kasungu District Hospital", 4368 + 24424, dr_2020),
#                 dr_2017 = ifelse(Names == "Nkhamenya Rural Hospital", 2887 + 752, dr_2017), 
#                 dr_2018 = ifelse(Names == "Nkhamenya Rural Hospital", 851 + 3689, dr_2018), 
#                 dr_2019 = ifelse(Names == "Nkhamenya Rural Hospital", 533 + 4004, dr_2019),
#                 dr_2020 = ifelse(Names == "Nkhamenya Rural Hospital", 3587 + 5929, dr_2020),
#                 dr_2017 = ifelse(Names == "Mziza Health Centre", 3863 + 3489, dr_2017),
#                 dr_2018 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2018),
#                 dr_2019 = ifelse(Names == "Mziza Health Centre", 2815 + 1804, dr_2019),
#                 dr_2020 = ifelse(Names == "Mziza Health Centre", 2397 + 6194, dr_2020)) 

zipatala_aggregated <- dry_season_malaria_2017_2020[complete.cases(dry_season_malaria_2017_2020),] %>% 
  dplyr::filter(Names != "Kasalika Health Centre",
                Names != "Bua Health Centre",
                Names != "Kaluluma Rural Hospital") 

zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4528 + 16289

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4493 +15821

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 2729 + 10721

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Kasungu District Hospital")] <- 4368 + 24424
  
zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 2887 + 752

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 851 + 3689

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 533 + 4004

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Nkhamenya Rural Hospital")] <- 3587 + 5929

zipatala_aggregated$dr_2017[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 3863 + 3489

zipatala_aggregated$dr_2018[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2815 + 1804

zipatala_aggregated$dr_2019[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 2439 + 1740

zipatala_aggregated$dr_2020[which(zipatala_aggregated$Names == "Mziza Health Centre")] <- 6194 + 2397

# There are NA values in dry_season_malaria_2017_2020.csv, replace them by 0 and add a column to keep information
# dry_season_malaria_2017_2020$is_na = ifelse(is.na(dry_season_malaria_2017_2020$LONGITU), TRUE, FALSE)
# index = dry_season_malaria_2017_2020$is_na == TRUE
# dry_season_malaria_2017_2020[index, "LONGITU"] <- 0
# dry_season_malaria_2017_2020[index, "LATITUD"] <- 0
# 
# dry_season_malaria_2017_2020 <- SpatialPointsDataFrame(coords = dry_season_malaria_2017_2020[, 7:8],
#                                    data = dry_season_malaria_2017_2020[,1:6])

# write.csv(zipatala_aggregated, "data/Kasungu_malaria_2017_2020.csv")

health_facility_aggr_sf <- sf::st_as_sf(zipatala_aggregated,
                                        coords = c("LONGITU", "LATITUD"),
                                        crs = 4326, agr = "constant")


# Save as shapefile
# sf::st_write(health_facility_sf, "data/kasungu_zipatala_aggregated.shp", overwrite = TRUE)

# mapview::mapview(health_facility_aggr_sf)

# Set to interactive view
# tmap_mode('view')

old_catchment <- tm_shape(malire)+
  tm_polygons()+
  tm_shape(health_facility_aggr_sf)+
  tm_dots(size = .3, col = "blue", alpha = 0.5)+
  tm_text("Names", size = .3, just = "top", 
          col = "black", remove.overlap = TRUE)+
  tm_layout(frame = FALSE,
            title = "Old catchment boundaries",
            title.size = .8, 
            title.position = c("left", "top"))+
  tm_scale_bar(breaks = c(0, 10, 20), 
               text.size = .5)

new_catchment <- tm_shape(malire_new)+
  tm_polygons()+
  tm_shape(health_facility_aggr_sf)+
  tm_dots(size = .3, col = "blue", alpha = 0.5)+
  tm_text("Names", size = .3, just = "top", 
          col = "black", remove.overlap = TRUE)+
  tm_layout(frame = FALSE,
            title = "New catchment boundaries",
            title.size = .8, 
            title.position = c("left", "top"))+
  tm_compass(position=c("right", "top"))

tmap::tmap_arrange(old_catchment, new_catchment)
Fig 2. Kasungu Health-care Facilities and Catchment Areas

Fig 2. Kasungu Health-care Facilities and Catchment Areas

View population and malaria rate for Malawi by health facility catchment area

Using population and health information within each health facility catchment area we produce a choropleth map colored in proportion to a statistical variable that represents an aggregate summary of a geographic characteristic, in this case total population, population density, malaria rate and total malaria cases.

# Take a look at the variables, CRS and geometry type
# head(malire)

# Function to create a choropleth map from sf object
choroplethmap <- function(df, vname = NA, pal = NA, legend.title = NA){
  # choropleth map
  # df: simple feature polygon layer
  # vname: variable name (as character, in quotes)
  # pal: color palette
  # legend.title: legend title in quotes
  # returns:
  # a tmap element (plots a map)
  tm_shape(df)+
    tm_fill(col = vname, style = "quantile",
            palette = pal, n = 5, title = legend.title)+
    tm_borders()+
    tm_layout(legend.position = c("right","bottom"))
}


population <- choroplethmap(malire, vname = "POPULATION", 
                            pal = "Reds", legend.title = "Total population")

pop_density <- choroplethmap(malire, vname = "DENSITY", 
                             pal = "YlOrRd", legend.title = "Population density")

malaria_rate <- choroplethmap(malire, vname = "MALAR_RATE", 
                              pal = "BuGn", legend.title = "Malaria prevalence %")

malaria_cases <- choroplethmap(malire, vname = "MALARIA_CA", 
                               pal = "Purples", legend.title = "Malaria cases")

tmap_arrange(population, pop_density, malaria_rate, malaria_cases)
Fig.3 Population and health information for each health facility catchment area

Fig.3 Population and health information for each health facility catchment area

View raster population metadata and estimated population per grid-cell

CRS for kasungu_population layers is already in WGS 84 UTM Zone 36 South, which is the base projected coordinate system for Malawi and has units in meters, hence no need to transform it.The highest estimated population per grid-cell is 7,949 people in 2020.

# Check out the CRS and values of the population layers
kasungu_population_2017
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2017_1km_aggregated.tif 
## names      : ku_pop_2017_1km_aggregated
kasungu_population_2018
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2018_1km_aggregated.tif 
## names      : ku_pop_2018_1km_aggregated 
## values     : 0, 6253.557  (min, max)
kasungu_population_2019
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2019_1km_aggregated.tif 
## names      : ku_pop_2019_1km_aggregated 
## values     : 0, 6483.727  (min, max)
kasungu_population_2020
## class      : RasterLayer 
## dimensions : 150, 128, 19200  (nrow, ncol, ncell)
## resolution : 920.0898, 918.7667  (x, y)
## extent     : 491272.7, 609044.2, 8494349, 8632164  (xmin, xmax, ymin, ymax)
## crs        : +proj=utm +zone=36 +south +datum=WGS84 +units=m +no_defs 
## source     : C:/Users/cnkolokosa/Documents/R/upscaled_2021_updated_May/upscaled_2021/data/ku_pop_2020_1km_aggregated.tif 
## names      : ku_pop_2020_1km_aggregated 
## values     : 0, 7949.033  (min, max)
# Function to create a raster population map
populationmap <- function(df, title){
  # population map
  # arguments:
  #   df:  aggregated population raster layer
  #   legend.title: legend title
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_raster(palette = "Reds", title = title,
              breaks = c(0,100,200,400,600,800,1000,2000,4000,6000,8000))+
    tm_layout(legend.position = c("right", "bottom"),
              frame = FALSE)+
    tm_scale_bar(position = c("left", "bottom"))
}
# Set to static map
tmap_mode("plot")

pop_2017 <- populationmap(kasungu_population_2017, title = "2017 Population")

pop_2018 <- populationmap(kasungu_population_2018, title = "2018 Population")

pop_2019 <- populationmap(kasungu_population_2019, title = "2019 Population")

pop_2020 <- populationmap(kasungu_population_2020, title = "2020 Population")

tmap_arrange(pop_2017, pop_2018, pop_2019, pop_2020, nrow = 2) # Layout the maps
Fig.4 Estimated total number of people per 1km grid-cell

Fig.4 Estimated total number of people per 1km grid-cell

Buffer, combine and transform TropWet derived waterbody polygons

Buffers of 30m radius have been created around open waterbodies and the geometries of overlapping polygons are unioned together. st_union returns a single geometry sfc object, which is why st_cast and st_as_sf functions have been used to cast and convert the multipolygon buffer geometries to a dissolved or split polygon geometry collection.

surfaceWater_2017 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2017, dist = 30)), "POLYGON"))

surfaceWater_2018 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2018, dist = 30)), "POLYGON"))

surfaceWater_2019 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2019, dist = 30)), "POLYGON"))

surfaceWater_2020 <- sf::st_as_sf(st_cast(st_union(st_buffer(water_2020, dist = 30)), "POLYGON"))

Create a function for computing buffers around open waterbody polygons

waterbody_buffer <- function(waterbody, distance, catchment){
  
  #Buffer the 'water' vector file by 'distance' meters
  buffer_radiusk <- st_buffer(waterbody, distance)
  
  # Dissolve the buffers
  buffer_radiusk_union <- st_as_sf(st_cast(st_union(buffer_radiusk),"MULTIPOLYGON"))
  
  # Assign Attributes of the 'catchment' to each of the waterbodies. 
  int_radiusk <- st_intersection(buffer_radiusk_union, catchment)
  open_water_buffer <- st_as_sf(int_radiusk)
  
  # Polygons being seen to be in multiple catchments
  st_intersects(open_water_buffer, catchment)
  
  # Make the assumption that the attribute is constant throughout the geometry
  st_agr(open_water_buffer) = "constant"
  st_agr(catchment) = "constant"
  
  return(out = open_water_buffer)
}

Create buffers using Kasungu open waterbodies and health-facility catchment boundary layers

st_buffer has been used to compute 1km, 2km and 3km buffers around each waterbody polygon. Then geometry of the buffer features are then combined resulting in resolved internal boundaries. Invalid waterbody polygons can be checked by using st_is_valid which returns by default NA on corrupt geometries.

# For 2017 TropWet surface water polygons
buffer_1km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 1000, catchment = malire_new)

buffer_2km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 2000, catchment = malire_new)

buffer_3km_2017 <- waterbody_buffer(waterbody = surfaceWater_2017, 
                                    distance = 3000, catchment = malire_new)

# For 2018 TropWet surface water polygons
buffer_1km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 1000, catchment = malire)

buffer_2km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 2000, catchment = malire)

buffer_3km_2018 <- waterbody_buffer(waterbody = surfaceWater_2018, 
                                    distance = 3000, catchment = malire)

# For 2019 TropWet surface water polygons
buffer_1km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 1000, catchment = malire)

buffer_2km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 2000, catchment = malire)

buffer_3km_2019 <- waterbody_buffer(waterbody = surfaceWater_2019, 
                                    distance = 3000, catchment = malire)

# For 2020 TropWet surface water polygons
buffer_1km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 1000, catchment = malire)

buffer_2km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 2000, catchment = malire)

buffer_3km_2020 <- waterbody_buffer(waterbody = surfaceWater_2020, 
                                    distance = 3000, catchment = malire)

View the intersected buffers

# Map the buffers
buffermap <- function(df, boundary, title = NA){
  # function for creating buffer map in ggplot
  # arguments:
  #   df:  buffer polygon layer
  #   boundary: Kasungu district boundary layer
  #   title: main title
  # returns:
  #   a map-element (plots a map)
  ggplot(data = df)+
     geom_sf()+
     geom_sf(data = boundary, fill = NA)+
     theme_classic()+
     labs(title = title)
 }

# For 2017 
buffer1km_2017 <- buffermap(buffer_1km_2017, kasungu_district, title = "2017: 1km Buffers")
buffer2km_2017 <- buffermap(buffer_2km_2017, kasungu_district, title = "2017: 2km Buffers")
buffer3km_2017 <- buffermap(buffer_3km_2017, kasungu_district, title = "2017: 3km Buffers")

# For 2018
buffer1km_2018 <- buffermap(buffer_1km_2018, kasungu_district, title = "2018: 1km Buffers")
buffer2km_2018 <- buffermap(buffer_2km_2018, kasungu_district, title = "2018: 2km Buffers")
buffer3km_2018 <- buffermap(buffer_3km_2018, kasungu_district, title = "2018: 3km Buffers")

# For 2019
buffer1km_2019 <- buffermap(buffer_1km_2019, kasungu_district, title = "2019: 1km Buffers")
buffer2km_2019 <- buffermap(buffer_2km_2019, kasungu_district, title = "2019: 2km Buffers")
buffer3km_2019 <- buffermap(buffer_3km_2019, kasungu_district, title = "2019: 3km Buffers")

# For 2020
buffer1km_2020 <- buffermap(buffer_1km_2020, kasungu_district, title = "2020: 1km Buffers")
buffer2km_2020 <- buffermap(buffer_2km_2020, kasungu_district, title = "2020: 2km Buffers")
buffer3km_2020 <- buffermap(buffer_3km_2020, kasungu_district, title = "2020: 3km Buffers")
 
grid.arrange(buffer1km_2017, buffer1km_2018, buffer1km_2019, buffer1km_2020,
             buffer2km_2017, buffer2km_2018, buffer2km_2019, buffer2km_2020, 
             buffer3km_2017, buffer3km_2018, buffer3km_2019, buffer3km_2020, ncol = 4)
Fig 5. Buffers around open waterbodies in Kasungu

Fig 5. Buffers around open waterbodies in Kasungu

Estimating population living in various distances from open water bodies and in each health facility catchment

Here, we create a function that uses open water bodies, health facility catchment boundary and population datasets to estimate the number of people living within 1km, 2km and 3km buffers surrounding the waterbodies. This involves overlaping and intersecting different data layers (buffers of waterbodies, catchment boundary, population raster, etc), so that we can apportion population from one layer to another. In this model, the layer with population estimate is kasungu_population_* and the target layer that does not have an estimate, but for which we desire one, is kasungu_health_facility_catchment/malire. The function returns an object called finalized that has estimated population within the buffers zones, and multipolygon geometry.

nachulu <- function(water, distance, catchment, raster_population){
  # function to estimate population out of raster and vector layers
  # arguments:
  #    water: waterbody polygon layer
  #    distance: buffer distance in meters
  #    catchment: health facility catchment boundary layer generated from accessibility mapping
  #    raster_population: aggregated population raster layer
  # returns:
  #    finalized: sf objects with a data frame containing estimated population
  
  
  #Buffer the 'water' vector file by 'distance' meters
  buffer_radiusk <- st_buffer(water, distance)
  
  # Dissolve the buffers. Unioning geometries dissolves for instance internal polygon 
  # boundaries, which otherwise would lead to invalid MULTIPOLYGON errors in subsequent analysis.
  buffer_radiusk_union <- sf::st_as_sf(st_cast(st_union(buffer_radiusk),"POLYGON"))
  
  # Split the buffered water file by the boundaries of the catchment area. 
  # We don't want to allocate the attributes in this step 
  int_radiusk <- st_intersection(buffer_radiusk_union, st_geometry(catchment))
  water_int_radiusk <- sf::st_as_sf(int_radiusk)
  
  # Convert the MULTIPOLYGON object into several POLYGON objects
  water_int_radiusk <- st_cast(st_buffer(water_int_radiusk,0.0), "MULTIPOLYGON") %>% 
    st_cast("POLYGON")
  
  # Polygons being seen to be in multiple catchments
  st_intersects(water_int_radiusk, catchment)
  
  # Estimation of population within X kilometer buffer.
  # Extracting zonal statistics from a population raster layer. 
  # The population raster is a continuous gridded surface layer that assign an 
  # estimated population density value to every square in their grid. 
  # The population statistics are then summed and apportioned to the buffer polygons
  water_int_radiusk$pop_est<- raster::extract(raster_population, water_int_radiusk, 
                                              fun = sum, na.rm = TRUE)
  
  # Make the assumption that the attribute is constant throughout the geometry
  st_agr(water_int_radiusk) = "constant"
  st_agr(catchment) = "constant"
  
  # Find which catchment each polygon belongs to using its centroid - a point dataset 
  # representing the geographic center-points of the polygons 
  assign_catchment <- st_intersection(st_centroid(water_int_radiusk), catchment)
  
  
  # Calculated total population living X distance for each facility  
  # Notice that the assign_catchment is comprised of separate POLYGONS (assign_catchment$x). 
  # The first step is to “dissolve” away these POLYGONS into one MULTIPOLYGON. 
  # There is no sf equivalent to the ArcMap “dissolve” operation. 
  # Instead we use a combination of group_by and summarize from the dplyr package. 
  # Stats::aggregate from sf package, and dplyr::summarize both do essentially the same.
  npeople <- assign_catchment %>% dplyr::group_by(DN) %>%
    summarize(pop_estimate = round(sum(pop_est, na.rm = TRUE)))
  
  finalized <- merge(catchment, st_drop_geometry(npeople), by='DN', all.x = TRUE)
  return(out=finalized)
}

Run the model to estimate population living around open waterbodies

Using the nachulu function, here we estimate the number of people surrounding waterbodies in each health facility catchment area using attributes from the open waterbody buffers, health facility catchment boundary and aggregated population raster layers. That is, population living in various distances from open water bodies e.g. 1km, 2km and 3 km is estimated and assigned to health facilities.

# For 2017 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2017<- nachulu(water = surfaceWater_2017, distance = 1000, 
                     catchment = malire_new,
                     raster_population = kasungu_population_2017)

run1k_2017$pop_1km <- run1k_2017$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2017<- nachulu(water = surfaceWater_2017, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2017)

run2k_2017$pop_2km <- run2k_2017$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2017<- nachulu(water = surfaceWater_2017, distance = 3000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2017)

run3k_2017$pop_3km <- run3k_2017$pop_estimate


# For 2018 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2018<- nachulu(water = surfaceWater_2018, distance = 1000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2018)

run1k_2018$pop_1km <- run1k_2018$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2018<- nachulu(water = surfaceWater_2018, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2018)

run2k_2018$pop_2km <- run2k_2018$pop_estimate

# Estimate population living within 3km radius from water bodies
 run3k_2018 <- nachulu(water = surfaceWater_2018, distance = 3000, 
                       catchment = malire_new, 
                       raster_population = kasungu_population_2018)
 
 run3k_2018$pop_3km <- run3k_2018$pop_estimate


# For 2019 ---------------------------------------------------------------------
# Estimate population living within 1km radius from water bodies
run1k_2019 <- nachulu(water = surfaceWater_2019, distance = 1000, 
                      catchment = malire_new, 
                      raster_population = kasungu_population_2019)

run1k_2019$pop_1km <- run1k_2019$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2019<- nachulu(water = surfaceWater_2019, distance = 2000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2019)

run2k_2019$pop_2km <- run2k_2019$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2019<- nachulu(water = surfaceWater_2019, distance = 3000, 
                     catchment = malire_new, 
                     raster_population = kasungu_population_2019)

run3k_2019$pop_3km <- run3k_2019$pop_estimate


# For 2020 ---------------------------------------------------------------------
#  Estimate population living within 1km radius from water bodies
run1k_2020 <- nachulu(water = surfaceWater_2020, distance = 1000, 
                      catchment = malire_new, 
                      raster_population = kasungu_population_2020)

run1k_2020$pop_1km <- run1k_2020$pop_estimate

# Estimate population living within 2km radius from water bodies
run2k_2020 <- nachulu(water = surfaceWater_2020, distance = 2000, 
                      catchment = malire_new, 
                     raster_population = kasungu_population_2020)

run2k_2020$pop_2km <- run2k_2020$pop_estimate

# Estimate population living within 3km radius from water bodies
run3k_2020 <- nachulu(water = surfaceWater_2020, distance = 3000, catchment = malire_new, 
                     raster_population = kasungu_population_2020)

run3k_2020$pop_3km <- run3k_2020$pop_estimate

View the estimated population

Map the outputs from the nachulu function: layers of polygons representing health facility catchment areas, with a field in the attribute table containing the estimated catchment population pop_estimate in 2017, 2018, 2019 and 2020. In areas where the input data is out of data, e.g, no presence of waterbody polygons, the estimated population is missing.

# Function to create maps of estimated population out of sf objects from the nachulu function
estimatedpop <- function(df, vname = "pop_estimate", title, legend.title = NA){
  # estimated population map
  # df: estimated population layer from nachulu function
  # vname: variable name (as character, in qoutes)
  # title: map title in quotes
  # legend.title: legend title in qoutes
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_fill(col = vname, style = "quantile", 
            palette = "Reds", n = 5, title = legend.title)+
    tm_borders()+
    tm_layout(legend.position = c(0.7,"bottom"),
              title.size = .3,
              frame = FALSE)+
    tm_credits(title, position = c(0,0.75), size = 1)
}

run1k_2017_map <- estimatedpop(run1k_2017, title = "2017: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2017_map <- estimatedpop(run2k_2017, title = "2017: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2017_map <- estimatedpop(run3k_2017, title = "2017: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2018_map <- estimatedpop(run1k_2018, title = "2018: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2018_map <- estimatedpop(run2k_2018, title = "2018: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2018_map <- estimatedpop(run3k_2018, title = "2018: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2019_map <- estimatedpop(run1k_2019, title = "2019: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2019_map <- estimatedpop(run2k_2019, title = "2019: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2019_map <- estimatedpop(run3k_2019, title = "2019: 3km buffers", 
                               legend.title = "Estimated \n population")

run1k_2020_map <- estimatedpop(run1k_2020, title = "2020: 1km buffers", 
                               legend.title = "Estimated \n population")

run2k_2020_map <- estimatedpop(run2k_2020, title = "2020: 2km buffers", 
                               legend.title = "Estimated \n population")

run3k_2020_map <- estimatedpop(run3k_2020, title = "2020: 3km buffers", 
                               legend.title = "Estimated \n population")

tmap_arrange(run1k_2017_map, run2k_2017_map, run3k_2017_map, 
             run1k_2018_map, run2k_2018_map, run3k_2018_map,
             run1k_2019_map, run2k_2019_map, run3k_2019_map,
             run1k_2020_map, run2k_2020_map, run3k_2020_map, ncol = 3)
Fig 6. Estimated population in Kasungu health facility catchments

Fig 6. Estimated population in Kasungu health facility catchments

Merge estimated population within the various buffers into a single data frame

First, we need to convert the sf outputs from the nachulu function to a plain data frame with as.data.frame, which drops the geometry and gives back a plain data frame

# Convert sf objects to plain data frame
# 2017 -------------------------------------------------------------------------
run1k_2017_df <- as.data.frame(run1k_2017)[,c("DN", "pop_1km")]
run2k_2017_df <- as.data.frame(run2k_2017)[,c("DN", "pop_2km")]
run3k_2017_df <- as.data.frame(run3k_2017)[,c("DN", "pop_3km")]

# 2018 -------------------------------------------------------------------------
run1k_2018_df <- as.data.frame(run1k_2018)[,c("DN", "pop_1km")]
run2k_2018_df <- as.data.frame(run2k_2018)[,c("DN", "pop_2km")]
run3k_2018_df <- as.data.frame(run3k_2018)[,c("DN", "pop_3km")]

# 2019 -------------------------------------------------------------------------
run1k_2019_df <- as.data.frame(run1k_2019)[,c("DN", "pop_1km")]
run2k_2019_df <- as.data.frame(run2k_2019)[,c("DN", "pop_2km")]
run3k_2019_df <- as.data.frame(run3k_2019)[,c("DN", "pop_3km")]

# 2020 -------------------------------------------------------------------------
run1k_2020_df <- as.data.frame(run1k_2020)[,c("DN", "pop_1km")]
run2k_2020_df <- as.data.frame(run2k_2020)[,c("DN", "pop_2km")]
run3k_2020_df <- as.data.frame(run3k_2020)[,c("DN", "pop_3km")]

# Merge the data frames. NB: one important variable 'dry season malaria cases' 
# is yet to be added to the data frames
finalized_2017_df <- cbind.data.frame(run1k_2017_df, run2k_2017_df, run3k_2017_df)

finalized_2018_df <- cbind.data.frame(run1k_2018_df, run2k_2018_df, run3k_2018_df)

finalized_2019_df <- cbind.data.frame(run1k_2019_df, run2k_2019_df, run3k_2019_df)

finalized_2020_df <- cbind.data.frame(run1k_2020_df, run2k_2020_df, run3k_2020_df)


# Exclude unnecessary columns and rows
# 2017 -------------------------------------------------------------------------
finalized_2017_df <- finalized_2017_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2017_df <- cbind("Fid" = rownames(finalized_2017_df), finalized_2017_df)

# 2018 -------------------------------------------------------------------------
finalized_2018_df <- finalized_2018_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")]

finalized_2018_df <- cbind("Fid" = rownames(finalized_2018_df), finalized_2018_df)

# 2019 -------------------------------------------------------------------------
finalized_2019_df <- finalized_2019_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2019_df <- cbind("Fid" = rownames(finalized_2019_df), finalized_2019_df)

# 2020 -------------------------------------------------------------------------
finalized_2020_df <- finalized_2020_df[,c("DN", "pop_1km", "pop_2km", "pop_3km")] 
             
finalized_2020_df <- cbind("Fid" = rownames(finalized_2020_df), finalized_2020_df) 

Tidy and merge the estimated population data with the malaria data

The estimated population within 1km, 2km and 3km buffers around water bodies is now being merged with the 2017 to 2020 dry season malaria dataframe using a user-defined function: organize

# Create a function that merges and tidy the population and malaria dataframes 
organize <- function(malaria_data, pop_data){
  
  # function to merge, tidy dry season malaria and estimated population datasets
  # arguments:
  #   malaria_data: sf point object with a data frame containing aggregated 
  #                 malaria data at health facilities level
  #   pop_data: sf polygon object with a data frame containing the estimated population
  # returns:
  #   hfc_malaria_pop: sf objects with a data frame containing dry season malaria and estimated population


  # Convert sf objects to spatial
  estimated_pop_shp <- as(pop_data, "Spatial")
  malaria_shp <- as(malaria_data, "Spatial")

  # Match CRS
  malaria_shp <- spTransform(malaria_shp, crs(estimated_pop_shp))

  # Overlay aggregated health facility points and extract estimated population
  # Using 'point.in.poly' to return a point spatial object, in this case location of health facilities
  # and estimated population instead of sp::over function, which simply returns 
  # a data frame, with the same no. rows.
  # Argument 'sp = TRUE' returns an sp class object, else returns sf class object
  health_facilities_pop <- spatialEco::point.in.poly(malaria_shp, estimated_pop_shp, sp = TRUE) 
 
  # head(health_facilities_pop@data)

  # Add the extracted ID, health facility names, dry season malaria cases and estimated population to 
  # the health facility catchments (hfc)
  hfc_pop_malaria_shp <- merge(estimated_pop_shp, health_facilities_pop, 
                               by.x = "FID", by.y = "DN")

  # Convert the merged shapefile containing estimated population and malaria data to sf-object
  hfc_pop_malaria_sf <- sf::st_as_sf(hfc_pop_malaria_shp)

  # Tidy the data by reordering columns and dropping those not needed
  # First, get column names
  # colnames(hfc_pop_malaria_sf)
  # [1] "DN"           "pop_1km"      "pop_estimate" "FID"          "Names"        "dr_2017"     
  # [7] "dr_2018"      "dr_2019"      "dr_2020"      "coords.x1"    "coords.x2"    "geometry"

  hfc_pop_malaria_sf_trim <- hfc_pop_malaria_sf %>% 
    dplyr::select(-c(pop_estimate, coords.x1, coords.x2))
 
  # Reorder columns by position
  hfc_malaria_pop_sf <- hfc_pop_malaria_sf_trim[, c(4, 3, 1, 2, 5, 6, 7, 8, 9)]

  return(out = hfc_malaria_pop_sf)

}

# Invoking the function 
# 2017 data -------------------------------------------------------------------------------
hfc_1km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2017)
hfc_2km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2017)
hfc_3km_2017_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2017)

# 2018 data -------------------------------------------------------------------------------
hfc_1km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2018)
hfc_2km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2018)
hfc_3km_2018_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2018)

# 2019 data -------------------------------------------------------------------------------
hfc_1km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2019) 
hfc_2km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2019)
hfc_3km_2019_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2019)

# 20 20 data ------------------------------------------------------------------------------
hfc_1km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run1k_2020)
hfc_2km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run2k_2020)
hfc_3km_2020_sf <- organize(malaria_data = health_facility_aggr_sf, pop_data = run3k_2020)

# Check if merging and tidying worked
# Create a user-defined function that maps the merged sf objects
malaria_map <- function(df, year = NA, title, legend.title = NA){
  # dry season malaria cases map
  # df: sf objects from 'organize' function
  # vname: variable name (as character, in qoutes)
  # title: map title in quotes
  # legend.title: legend title in qoutes
  # returns:
  #   a tmap-element (plots a map)
  tm_shape(df)+
    tm_fill(col = year, palette = "Oranges", n = 5, title = legend.title,
            breaks = c(400, 4000, 10000, 16000, 20000, 30000 ))+
    tm_borders("burlywood")+
    tm_layout(legend.position = c(0.6, "bottom"),
              legend.text.size = 0.6,
              legend.title.size = 0.8,
              title.size = .3,
              frame = FALSE)+
     tm_credits(title, position = c(0.2, 0.8), size = .9)+
    tm_view(view.legend.position = c("right", "bottom"))
}

dry_season_malaria_2017 <- malaria_map(hfc_1km_2017_sf, year = "dr_2017", title = "2017", 
                                       legend.title = "2017 malaria cases")

dry_season_malaria_2018 <- malaria_map(hfc_1km_2018_sf, year = "dr_2018", title = "2018",
                                       legend.title = "2018 malaria cases")

dry_season_malaria_2019 <- malaria_map(hfc_1km_2019_sf, year = "dr_2019", title = "2019",
                                       legend.title = "2019 malaria cases")

dry_season_malaria_2020 <- malaria_map(hfc_1km_2020_sf, year = "dr_2020", title = "2020",
                                       legend.title = "2020 malaria cases")

tmap::tmap_arrange(dry_season_malaria_2017, dry_season_malaria_2018,
                   dry_season_malaria_2019, dry_season_malaria_2020, ncol = 2)
Fig. 7: Dry season malaria cases, Kasungu

Fig. 7: Dry season malaria cases, Kasungu

# Rename population columns ----------------------------------------------------------------
hfc_1km_2017_sf <-  hfc_1km_2017_sf %>% 
  dplyr::rename(pop_1km_2017 = pop_1km)

hfc_2km_2017_sf <- hfc_2km_2017_sf %>% 
  dplyr::rename(pop_2km_2017 = pop_2km)

hfc_3km_2017_sf <- hfc_3km_2017_sf %>% 
  dplyr::rename(pop_3km_2017 = pop_3km)

hfc_1km_2018_sf <- hfc_1km_2018_sf %>% 
  dplyr::rename(pop_1km_2018 = pop_1km)

hfc_2km_2018_sf <- hfc_2km_2018_sf %>% 
  dplyr::rename(pop_2km_2018 = pop_2km)

hfc_3km_2018_sf <- hfc_3km_2018_sf %>% 
  dplyr::rename(pop_3km_2018 = pop_3km)

hfc_1km_2019_sf <- hfc_1km_2019_sf %>% 
  dplyr::rename(pop_1km_2019 = pop_1km)

hfc_2km_2019_sf <- hfc_2km_2019_sf %>% 
  dplyr::rename(pop_2km_2019 = pop_2km)

hfc_3km_2019_sf <- hfc_3km_2019_sf %>% 
  dplyr::rename(pop_3km_2019 = pop_3km)

hfc_1km_2020_sf <- hfc_1km_2020_sf %>% 
  dplyr::rename(pop_1km_2020 = pop_1km)

hfc_2km_2020_sf <- hfc_2km_2020_sf %>% 
  dplyr::rename(pop_2km_2020 = pop_2km)

hfc_3km_2020_sf <- hfc_3km_2020_sf %>% 
  dplyr::rename(pop_3km_2020 = pop_3km)

# Save the sf polygons ---------------------------------------------------------------------
# sf::st_write(hfc_1km_2017_sf, "data/hfc_1km_2017.shp")
# sf::st_write(hfc_2km_2017_sf, "data/hfc_2km_2017.shp")
# sf::st_write(hfc_3km_2017_sf, "data/hfc_3km_2017.shp")
# 
# sf::st_write(hfc_1km_2018_sf, "data/hfc_1km_2018.shp")
# sf::st_write(hfc_2km_2018_sf, "data/hfc_2km_2018.shp")
# sf::st_write(hfc_3km_2018_sf, "data/hfc_3km_2018.shp")
# 
# sf::st_write(hfc_1km_2019_sf, "data/hfc_1km_2019.shp")
# sf::st_write(hfc_2km_2019_sf, "data/hfc_2km_2019.shp")
# sf::st_write(hfc_3km_2019_sf, "data/hfc_3km_2019.shp")
# 
# sf::st_write(hfc_1km_2020_sf, "data/hfc_1km_2020.shp")
# sf::st_write(hfc_2km_2020_sf, "data/hfc_2km_2020.shp")
# sf::st_write(hfc_3km_2020_sf, "data/hfc_3km_2020.shp")

Box plot of Kasungu dry season malaria cases from 2017 to 2020

dry_season_malaria <- as(hfc_1km_2017_sf, "Spatial")

ggplot2::ggplot(reshape2::melt(dry_season_malaria@data[, c("dr_2017", "dr_2018", "dr_2019", "dr_2020")]), 
  aes(variable, value)) +
  geom_boxplot() +
  xlab("Years") +
  ylab("Malaria cases") +
  theme_classic() 
Fig. 8: Dry season malaria cases from 2017 - 2020 in Kasungu

Fig. 8: Dry season malaria cases from 2017 - 2020 in Kasungu

Calculate the expected number of malaria cases in health facility catchments

Now, we combine all the dataframes with estimated population and malaria cases into one dataframe kasungu_model_df, and new columns (totalMalaria_year, totalPop_year, malariaProp_year, and expectedMalaria_year) are then added. The aim is to determine the standardised mortality/morbidity ratio (SMR). SMR compares the risk of morbidity/mortality in a population of interest with that of a standard population. In this case, interest in understanding whether the number of dry season malaria cases in each health facility catchment are greater than we would expect given the rate of malaria in the whole of Kasungu district.

# 2017 dataframes --------------------------------------------------------------
hfc_1km_2017_df <- hfc_1km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(Names, FID, pop_1km_2017,
                                       dr_2017, dr_2018, dr_2019, dr_2020, geometry)
 
hfc_2km_2017_df <- hfc_2km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2017)

hfc_3km_2017_df <- hfc_3km_2017_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2017)

# 2018 dataframes --------------------------------------------------------------
hfc_1km_2018_df <- hfc_1km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2018)

hfc_2km_2018_df <- hfc_2km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2018)

hfc_3km_2018_df <- hfc_3km_2018_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2018)

# 2019 dataframes --------------------------------------------------------------
hfc_1km_2019_df <- hfc_1km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2019)

hfc_2km_2019_df <- hfc_2km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2019)

hfc_3km_2019_df <- hfc_3km_2019_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2019)

# 2020 dataframes --------------------------------------------------------------
hfc_1km_2020_df <- hfc_1km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_1km_2020)

hfc_2km_2020_df <- hfc_2km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_2km_2020)

hfc_3km_2020_df <- hfc_3km_2020_sf %>% unclass %>% 
  dplyr::as_tibble() %>% dplyr::select(FID, pop_3km_2020)

# Bind the dataframes and create new columns containing the total number of malaria
# cases (totalMalaria_year) throughout the whole district for each year, 
# total population of the whole district (totalPop_year) for each year, and 
# expected number of malaria cases (expectedMalaria_year) in each catchment,
# which is calculated by dividing totalMalaria_year with totalPop_year and multiplied
# by the number of people in each catchment
# 
kasungu_model_df <- cbind(hfc_1km_2017_df, hfc_2km_2017_df, hfc_3km_2017_df,
                          hfc_1km_2018_df, hfc_2km_2018_df, hfc_3km_2018_df,
                          hfc_1km_2019_df, hfc_2km_2019_df, hfc_3km_2019_df,
                          hfc_1km_2020_df, hfc_2km_2020_df, hfc_3km_2020_df) %>% 
  dplyr::select(-FID) %>%
  dplyr::mutate(totalMalaria_2017 = sum(dr_2017),
                totalMalaria_2018 = sum(dr_2018),
                totalMalaria_2019 = sum(dr_2019),
                totalMalaria_2020 = sum(dr_2020)) %>% 
  dplyr::mutate(totalPop_1km_2017 = round(sum(pop_1km_2017, na.rm = TRUE)),
                totalPop_2km_2017 = round(sum(pop_2km_2017, na.rm = TRUE)),
                totalPop_3km_2017 = round(sum(pop_3km_2017, na.rm = TRUE)),
                totalPop_1km_2018 = round(sum(pop_1km_2018, na.rm = TRUE)),
                totalPop_2km_2018 = round(sum(pop_2km_2018, na.rm = TRUE)),
                totalPop_3km_2018 = round(sum(pop_3km_2018, na.rm = TRUE)),
                totalPop_1km_2019 = round(sum(pop_1km_2019, na.rm = TRUE)),
                totalPop_2km_2019 = round(sum(pop_2km_2019, na.rm = TRUE)),
                totalPop_3km_2019 = round(sum(pop_3km_2019, na.rm = TRUE)),
                totalPop_1km_2020 = round(sum(pop_1km_2020, na.rm = TRUE)),
                totalPop_2km_2020 = round(sum(pop_2km_2020, na.rm = TRUE)),
                totalPop_3km_2020 = round(sum(pop_3km_2020, na.rm = TRUE))) %>% 
  dplyr::mutate(malariaProp_1km_2017 = round(totalMalaria_2017/totalPop_1km_2017, 1),
                malariaProp_2km_2017 = round(totalMalaria_2017/totalPop_2km_2017, 1),
                malariaProp_3km_2017 = round(totalMalaria_2017/totalPop_3km_2017, 1),
                malariaProp_1km_2018 = round(totalMalaria_2018/totalPop_1km_2018, 1),
                malariaProp_2km_2018 = round(totalMalaria_2018/totalPop_2km_2018, 1),
                malariaProp_3km_2018 = round(totalMalaria_2018/totalPop_3km_2018, 1),
                malariaProp_1km_2019 = round(totalMalaria_2019/totalPop_1km_2019, 1),
                malariaProp_2km_2019 = round(totalMalaria_2019/totalPop_2km_2019, 1),
                malariaProp_3km_2019 = round(totalMalaria_2019/totalPop_3km_2019, 1),
                malariaProp_1km_2020 = round(totalMalaria_2020/totalPop_1km_2020, 1),
                malariaProp_2km_2020 = round(totalMalaria_2020/totalPop_2km_2020, 1),
                malariaProp_3km_2020 = round(totalMalaria_2020/totalPop_3km_2020, 1)) %>% 
  dplyr::rowwise() %>%
  dplyr::mutate(expectedMalaria_1km_2017 = round(malariaProp_1km_2017*pop_1km_2017),
                expectedMalaria_2km_2017 = round(malariaProp_2km_2017*pop_2km_2017),
                expectedMalaria_3km_2017 = round(malariaProp_3km_2017*pop_3km_2017),
                expectedMalaria_1km_2018 = round(malariaProp_1km_2018*pop_1km_2018),
                expectedMalaria_2km_2018 = round(malariaProp_2km_2018*pop_2km_2018),
                expectedMalaria_3km_2018 = round(malariaProp_3km_2018*pop_3km_2018),
                expectedMalaria_1km_2019 = round(malariaProp_1km_2019*pop_1km_2019),
                expectedMalaria_2km_2019 = round(malariaProp_2km_2019*pop_2km_2019),
                expectedMalaria_3km_2019 = round(malariaProp_3km_2019*pop_3km_2019),
                expectedMalaria_1km_2020 = round(malariaProp_1km_2020*pop_1km_2020),
                expectedMalaria_2km_2020 = round(malariaProp_2km_2020*pop_2km_2020),
                expectedMalaria_3km_2020 = round(malariaProp_3km_2020*pop_3km_2020))

# Take a glimpse at the data
table <- kasungu_model_df %>%
  dplyr::select(Names, observed_2017 = `dr_2017`, observed_2018 = `dr_2018`, 
                observed_2019 = `dr_2019`, observed_2020 = `dr_2020`, 
                expectedMalaria_1km_2017, expectedMalaria_2km_2017, expectedMalaria_3km_2017, 
                expectedMalaria_1km_2018, expectedMalaria_2km_2018, expectedMalaria_3km_2018, 
                expectedMalaria_1km_2019, expectedMalaria_2km_2019, expectedMalaria_3km_2019, 
                expectedMalaria_1km_2020, expectedMalaria_2km_2020, expectedMalaria_3km_2020) %>% 
  kable %>%
  kableExtra::kable_styling(full_width = FALSE)

table
Names observed_2017 observed_2018 observed_2019 observed_2020 expectedMalaria_1km_2017 expectedMalaria_2km_2017 expectedMalaria_3km_2017 expectedMalaria_1km_2018 expectedMalaria_2km_2018 expectedMalaria_3km_2018 expectedMalaria_1km_2019 expectedMalaria_2km_2019 expectedMalaria_3km_2019 expectedMalaria_1km_2020 expectedMalaria_2km_2020 expectedMalaria_3km_2020
Anchor Farm 2966 3142 1978 2837 6375 5012 5108 7078 5974 5349 7955 5760 7780 9997 9144 8071
Chamama Health Facility 1878 1750 1508 1490 559 1116 1314 777 1266 1326 253 540 762 916 1466 1463
Chamwabvi Health Centre 3601 4027 1686 1386 4478 5438 6019 6466 7414 7016 3959 3598 5178 7628 9654 8856
Chinyama 2292 2116 1770 2411 2419 2267 2334 2309 2200 1909 1124 952 1299 3128 3270 2968
Chulu Health Centre 5801 4502 5470 9628 3449 3945 5172 4070 5819 5739 4475 3687 5228 8843 9354 8182
Dwangwa Dispensary 2192 2394 2553 4196 4461 5617 6976 4636 6024 6112 4345 4136 5792 6460 9765 9617
Gogode Dispensary 2745 4138 2200 3957 1304 1870 2027 918 1467 1763 839 600 1137 2142 2995 2643
Kamboni Health Centre 3824 4745 2770 6265 2135 2461 2805 5097 3866 3392 3536 2225 3326 3524 5047 4417
Kapelula Health Centre 4138 5338 2558 7894 1124 1686 2251 4622 5006 5715 2501 2758 4289 NA NA NA
Kasungu District Hospital 20817 20314 13450 28792 43386 32189 31995 36015 36144 28346 22125 16800 26026 62221 51068 44270
Kawamba Health Centre 5808 6462 4402 7949 2540 3001 4010 7500 5986 4926 4987 3038 3757 6233 7273 6368
Khola Health Centre 2195 2802 2708 4918 627 763 971 1160 1211 1517 932 478 1052 1636 1908 1724
Linyangwa Health Centre 3359 3268 3330 6116 1571 2051 2932 1188 1621 2094 1573 1641 2821 2151 3058 3703
Livwezi Health Centre 1307 2361 787 1339 3004 2705 3510 2339 2764 3685 2347 1984 3310 3254 3281 4172
Lodjwa Health Centre 1161 1151 1713 1955 1714 422 775 NA NA NA 609 73 219 148 359 505
Lodjwa Health Centre 1161 1151 1713 1955 1714 422 775 NA NA NA 609 73 219 148 359 505
Mdunga Health Centre 2045 2923 1008 3332 NA NA NA 1989 1575 2425 1194 1237 1886 NA NA NA
Mkhota Health Centre 2586 5920 2162 6070 1926 2048 2092 4988 3967 4131 3116 2286 3803 5949 5481 5860
Mnyanja Health Centre 3298 2864 3148 7259 923 1229 2630 1434 2065 3543 1312 1395 2965 NA 181 735
Mpepa /Chisinga Health Centre 2784 3602 4248 7588 NA NA NA 1512 2331 2642 462 654 1126 NA NA NA
Mtunthama Health Centre 3622 5308 2763 2901 6990 7149 6068 6215 6460 4720 3514 2927 3463 8435 10620 7602
Mziza Health Centre 7352 4619 4179 8591 10953 9880 11185 10296 8556 9412 8627 7106 10057 18045 13228 11584
Newa Mpasazi Health Centre 427 749 809 3318 275 547 718 216 466 633 431 538 1283 680 915 1152
Newa Mpasazi Health Centre 427 749 809 3318 275 547 718 216 466 633 431 538 1283 680 915 1152
Nkhamenya Rural Hospital 3639 4540 4537 9516 2802 4191 5295 1530 1610 2024 2709 2692 5247 4975 7967 9598
Ofesi Health Centre 4036 3446 4079 4311 NA NA 196 724 772 865 974 1275 2157 NA NA 275
Santhe Health Centre 5838 7267 5210 7242 296 473 472 589 1590 1479 977 842 1233 1285 1981 1919
Simlemba Health Centre 2197 2515 2339 8051 926 1303 1840 1150 1352 1894 1837 1776 3128 1647 2451 3023
Wimbe Health Centre 5660 5010 3041 3690 4065 3661 3726 2973 3127 2795 1417 1256 1886 5324 5179 4533

Save Kasungu modeldata

write.csv(kasungu_model_df, file = "data/Kasungu_model_data_2017_2020.csv")

Model fitting

We can then model the observed outcomes using Poisson regression with an offset of the log of the expected counts expectedMalaria. Poisson regression framework has been used as it is suitable for modeling count outcomes.

# Defining model parameters
# dr_2017, dr_2018, dr_2019 and dr_2020 are recorded dry season malaria cases in Kasungu
# malariaProp_1km, malariaProp_2km and malariaProp_3km are the malaria proportion 
# (total number of malaria cases divided by total population) within 1km, 2km and 3km buffers respectively
# expectedMalaria is the number of malaria cases we would expect given the rate of malaria, and is 
# calculated by dividing total malaria cases with total population and multiplied
# by the number of people in each health facility.

# 2017 -----------------------------------------------------------------------------------
model_1km_2017 <- glm(dr_2017~malariaProp_1km_2017, offset = log(expectedMalaria_1km_2017),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)
# alias(model_1km_2017)
# sjPlot::tab_model(model_1km_2017)
summary(model_1km_2017)
## 
## Call:
## glm(formula = dr_2017 ~ malariaProp_1km_2017, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_1km_2017))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -103.21   -10.55    19.45    43.17   157.44  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.095046   0.003158   -30.1   <2e-16 ***
## malariaProp_1km_2017        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 69474  on 25  degrees of freedom
## Residual deviance: 69474  on 25  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 69730
## 
## Number of Fisher Scoring iterations: 5
model_2km_2017 <- glm(dr_2017~malariaProp_2km_2017, offset = log(expectedMalaria_2km_2017),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_2km_2017)
## 
## Call:
## glm(formula = dr_2017 ~ malariaProp_2km_2017, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_2km_2017))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -64.99  -20.71   20.53   29.95  137.09  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.016828   0.003158  -5.329 9.86e-08 ***
## malariaProp_2km_2017        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 46397  on 25  degrees of freedom
## Residual deviance: 46397  on 25  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 46653
## 
## Number of Fisher Scoring iterations: 5
model_3km_2017 <- glm(dr_2017~malariaProp_3km_2017, offset = log(expectedMalaria_3km_2017),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_3km_2017)
## 
## Call:
## glm(formula = dr_2017 ~ malariaProp_3km_2017, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_3km_2017))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -60.86  -22.52   14.53   21.16  139.94  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.087914   0.003096   -28.4   <2e-16 ***
## malariaProp_3km_2017        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 56564  on 26  degrees of freedom
## Residual deviance: 56564  on 26  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 56831
## 
## Number of Fisher Scoring iterations: 5
# 2018 -----------------------------------------------------------------------------------
model_1km_2018 <- glm(dr_2018~malariaProp_1km_2018, offset = log(expectedMalaria_1km_2018),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_1km_2018)
## 
## Call:
## glm(formula = dr_2018 ~ malariaProp_1km_2018, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_1km_2018))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -88.527   -7.684   20.021   38.109  152.619  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.009673   0.002925  -3.307 0.000943 ***
## malariaProp_1km_2018        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 68744  on 26  degrees of freedom
## Residual deviance: 68744  on 26  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 69016
## 
## Number of Fisher Scoring iterations: 5
model_2km_2018 <- glm(dr_2018~malariaProp_2km_2018, offset = log(expectedMalaria_2km_2018),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_2km_2018)
## 
## Call:
## glm(formula = dr_2018 ~ malariaProp_2km_2018, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_2km_2018))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -84.676   -9.049   14.182   32.486  105.534  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.035538   0.002925  -12.15   <2e-16 ***
## malariaProp_2km_2018        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 47583  on 26  degrees of freedom
## Residual deviance: 47583  on 26  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 47855
## 
## Number of Fisher Scoring iterations: 5
model_3km_2018 <- glm(dr_2018~malariaProp_3km_2018, offset = log(expectedMalaria_3km_2018),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_3km_2018)
## 
## Call:
## glm(formula = dr_2018 ~ malariaProp_3km_2018, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_3km_2018))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -55.46  -14.83    9.45   24.51  107.16  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)          0.006748   0.002925   2.307   0.0211 *
## malariaProp_3km_2018       NA         NA      NA       NA  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 37891  on 26  degrees of freedom
## Residual deviance: 37891  on 26  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 38163
## 
## Number of Fisher Scoring iterations: 5
# 2019 ----------------------------------------------------------------------------------
model_1km_2019 <- glm(dr_2019~malariaProp_1km_2019, offset = log(expectedMalaria_1km_2019),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_1km_2019)
## 
## Call:
## glm(formula = dr_2019 ~ malariaProp_1km_2019, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_1km_2019))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -80.10  -13.24   16.28   38.61  106.29  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)          -0.002718   0.003353   -0.81    0.418
## malariaProp_1km_2019        NA         NA      NA       NA
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 59895  on 28  degrees of freedom
## Residual deviance: 59895  on 28  degrees of freedom
## AIC: 60178
## 
## Number of Fisher Scoring iterations: 5
model_2km_2019 <- glm(dr_2019~malariaProp_2km_2019, offset = log(expectedMalaria_2km_2019),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_2km_2019)
## 
## Call:
## glm(formula = dr_2019 ~ malariaProp_2km_2019, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_2km_2019))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -71.32  -14.11   11.07   33.91   92.47  
## 
## Coefficients: (1 not defined because of singularities)
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          0.199219   0.003353   59.41   <2e-16 ***
## malariaProp_2km_2019       NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 59841  on 28  degrees of freedom
## Residual deviance: 59841  on 28  degrees of freedom
## AIC: 60123
## 
## Number of Fisher Scoring iterations: 5
model_3km_2019 <- glm(dr_2019~malariaProp_3km_2019, offset = log(expectedMalaria_3km_2019),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_3km_2019)
## 
## Call:
## glm(formula = dr_2019 ~ malariaProp_3km_2019, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_3km_2019))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -62.572  -15.341    5.495   34.836   94.533  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.228097   0.003353  -68.02   <2e-16 ***
## malariaProp_3km_2019        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 50090  on 28  degrees of freedom
## Residual deviance: 50090  on 28  degrees of freedom
## AIC: 50372
## 
## Number of Fisher Scoring iterations: 5
# 2020 ----------------------------------------------------------------------------------
model_1km_2020 <- glm(dr_2020~malariaProp_1km_2020, offset = log(expectedMalaria_1km_2020),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_1km_2020)
## 
## Call:
## glm(formula = dr_2020 ~ malariaProp_1km_2020, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_1km_2020))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -110.67   -19.95    30.26    78.90   123.86  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.182199   0.002693  -67.66   <2e-16 ***
## malariaProp_1km_2020        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 112127  on 23  degrees of freedom
## Residual deviance: 112127  on 23  degrees of freedom
##   (5 observations deleted due to missingness)
## AIC: 112374
## 
## Number of Fisher Scoring iterations: 5
model_2km_2020 <- glm(dr_2020~malariaProp_2km_2020, offset = log(expectedMalaria_2km_2020),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_2km_2020)
## 
## Call:
## glm(formula = dr_2020 ~ malariaProp_2km_2020, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_2km_2020))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -95.00  -28.44   19.64   62.35  203.52  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.139741   0.002625  -53.24   <2e-16 ***
## malariaProp_2km_2020        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 118354  on 24  degrees of freedom
## Residual deviance: 118354  on 24  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 118611
## 
## Number of Fisher Scoring iterations: 5
model_3km_2020 <- glm(dr_2020~malariaProp_3km_2020, offset = log(expectedMalaria_3km_2020),
                      data = kasungu_model_df, family = 'poisson', na.action = na.omit)

summary(model_3km_2020)
## 
## Call:
## glm(formula = dr_2020 ~ malariaProp_3km_2020, family = "poisson", 
##     data = kasungu_model_df, na.action = na.omit, offset = log(expectedMalaria_3km_2020))
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -96.31  -21.79   20.43   52.49  143.76  
## 
## Coefficients: (1 not defined because of singularities)
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          -0.035725   0.002587  -13.81   <2e-16 ***
## malariaProp_3km_2020        NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 99193  on 25  degrees of freedom
## Residual deviance: 99193  on 25  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 99461
## 
## Number of Fisher Scoring iterations: 5

Scatter plot

Check how well the fitted values (i.e. predictions at data points) line up with the observations.

# 2017 model ------------------------------------------------------------------
# plot fitted versus observed values
ggplot() + geom_point(aes(model_1km_2019$fitted.values, kasungu_model_df$dr_2019))

Create maps

pop_malaria_2017_2020 <- raster::shapefile(here::here("data/Kasungu_pop_malaria_2017_2020merge"))

pop_malaria_2017_2020$fitted_1km2019 <- round(model_1km_2019$fitted.values)

# mapview::mapview(pop_malaria_2017_2020, zcol = "fitted_1km2019")